* -----------------------------------------------*
* Specify variables, build division level dataset*
* -----------------------------------------------*

*loads the regerssion dataset and reformats it to fit our simulation layout
use ${d}datasets/final_data/regression_dataset${weight_window}${tf}, clear 

keep if maxweight_1995 < 1 & missing_weights_1995==0 & missing_spill_weights_1995 == 0
keep if year >= 1995 & year <= 2011

**** Build reverted ln1p stocks and spills to ln(1+x)
foreach depvar in auto95 tfa pauto95 {
    ren k`depvar'_${ttt} lnk`depvar'_${ttt}
	gen k`depvar'_${ttt} = exp(lnk`depvar'_${ttt})
	replace k`depvar'_${ttt} = 0 if k`depvar'_${ttt}0 == 1
	gen ln1Pk`depvar'_${ttt} = ln(1+k`depvar'_${ttt})
    ren spill`depvar'${ttt}_1995_a lnspill`depvar'${ttt}_1995_a 
	gen spill`depvar'${ttt}_1995_a = exp(lnspill`depvar'${ttt}_1995_a) 
	replace spill`depvar'${ttt}_1995_a = 0 if spill`depvar'${ttt}_1995_a0 == 1
	gen ln1Pspill`depvar'${ttt}_1995_a = ln(1+spill`depvar'${ttt}_1995_a)
}

* build tfa = "other/nonmach"
ren kNOT_tfa_${ttt} lnkNOT_tfa_${ttt}
gen kNOT_tfa_${ttt} = exp(lnkNOT_tfa_${ttt}) 
replace kNOT_tfa_${ttt} = 0 if kNOT_tfa_${ttt}0 == 1
gen ln1PkNOT_tfa_${ttt} = ln(1+kNOT_tfa_${ttt})
gen ln1PkNOT_tfa_${ttt}0 = kNOT_tfa_${ttt}0 

ren spillNtfa${ttt}_1995_a lnspillNtfa${ttt}_1995_a
gen spillNtfa${ttt}_1995_a = exp(lnspillNtfa${ttt}_1995_a)
replace spillNtfa${ttt}_1995_a = 0 if spillNtfa${ttt}_1995_a0 == 1

gen ln1PspillNtfa${ttt}_1995_a = ln(1+spillNtfa${ttt}_1995_a) 
gen ln1PspillNtfa${ttt}_1995_a0 = spillNtfa${ttt}_1995_a0

* Build / define tfam variables
gen tfam95_bia = tfa_bia - auto95_bia

gen ktfam95_${ttt} = ktfa_${ttt} - kauto95_${ttt}
gen byte ktfam95_${ttt}0 = ktfam95_bia == 0
gen lnktfam95_${ttt} = ln(ktfam95_${ttt})
gen ln1Pktfam95_${ttt} =ln(1+ktfam95_${ttt})

gen spilltfam95${ttt}_1995_a = spilltfa${ttt}_1995_a - spillauto95${ttt}_1995_a
gen byte spilltfam95${ttt}_1995_a0 = spilltfam95${ttt}_1995_a == 0
gen lnspilltfam95${ttt}_1995_a = ln(spilltfam95${ttt}_1995_a)
gen ln1Pspilltfam95${ttt}_1995_a = ln(1+exp(ln1Pspilltfa${ttt}_1995_a) - exp(ln1Pspillauto95${ttt}_1995_a))

* Expand by divisions, specify indicators variables
gen sdivision = "auto95"
expand 2, gen(new)
replace sdivision = "pauto95" if new
drop new
encode sdivision, gen(division)
drop sdivision
egen firm_division = group(BvD division)
egen division_year = group(year division)
egen division_yearindustry = group(year industry division)
egen countryyear = group(year division country_shr_1995) if year <= 2009

***make dummies for samsung and technology
gen is_Samsung = 1 if BvDIDnumber == "KR1301110006246"
replace is_Samsung = 0 if is_Samsung == .
gen is_auto95 = 1 if division == 1
gen is_pauto95 = 1 if division == 2
replace is_auto95 = 0 if is_auto95 == .
replace is_pauto95 = 0 if is_pauto95 == .

***make year dummies
levelsof year, local(years)
foreach y of local years {
    gen year_dummy`y' = year == `y'
}

foreach y of local years {
    gen ssdiv_year`y'_auto95 = is_Samsung * year_dummy`y' * is_auto95
    gen ssdiv_year`y'_pauto95 = is_Samsung * year_dummy`y' * is_pauto95
}

*we also drop the last year
drop *dummy* is_Samsung is_auto95 is_pauto95 ssdiv_year2011_*

xtset firm_division year
sort firm_division year

*** Define depvars, stocks and spills in their different versions
gen depvar = .
gen k0 = .

foreach depvar in auto95 "pauto95" {
        replace depvar = `depvar'_${ttt} if division == "`depvar'":division
        replace k0 = k`depvar'_${ttt} if year == 1995 & division == "`depvar'":division
}


foreach depvar in auto95 "pauto95" {
    gen stock`depvar' = ln1Pk`depvar'_${ttt}
    gen stock`depvar'zero = k`depvar'_${ttt}0
    gen spillovers`depvar' = ln1Pspill`depvar'${ttt}_1995_a
    gen spillovers`depvar'zero = spill`depvar'${ttt}_1995_a0
}
gen stockother = ln1PkNOT_tfa_${ttt}
gen stockotherzero = kNOT_tfa_${ttt}0
gen spilloversother = ln1PspillNtfa${ttt}_1995_a
gen spilloversotherzero = spillNtfa${ttt}_1995_a0



* And quadratic terms for divisions spillovers
gen sqspilloversauto95 = spilloversauto95^2
gen sqspilloverspauto95 = spilloverspauto95^2
gen sqspilloversother = spilloversother^2
gen spillovers_inter = spilloversauto95 * spilloverspauto95

gen cubicspilloversauto95 = spilloversauto95^3
gen cubicspilloverspauto95 = spilloverspauto95^3
gen cubicspilloversother = spilloversother^3

* Specifiy macro variables
clonevar LSW = lswMPm_1995_a
clonevar HSW = hswMPm_1995_a
clonevar VAEMP = vaempMPm_1995_a
clonevar GDPGAP = lngdpgap_1995_a
clonevar GDPPC = gdppcMPm_1995_a
gen ISP = LSW - HSW

* Add minimum wage macro variables
clonevar LSWx = lswMPx_1995_a
clonevar HSWx = hswMPx_1995_a
clonevar VAEMPx = vaempMPx_1995_a
clonevar GDPGAPx = lngdpgapx_1995_a
clonevar GDPPCx = gdppcMPx_1995_a
clonevar MINWx = minwMPx_1995_a

* Sum over both divisions; keep the entire firm if at least one auto95 or tfam95 patent is present (=at least one machinery patent)
bys BvD : egen _total_${ttt}_1995 = sum(depvar) if year>=1995+2 & year <= 2009+2
bys BvD : egen total_${ttt}_1995 = max(_total_${ttt}_1995)
drop _total_${ttt}_1995
keep if total_${ttt}_1995 > 0

* Add BGVR variables
* initial stock and dummy if stock is zero
foreach depvar in auto95 "pauto95" {
    gen dum_Kt1 = stock`depvar'zero if year==1995+1
    bys lse_id : egen init_stock`depvar'zero = max(dum_Kt1)
    gen Kt1 = stock`depvar' if year==1995+1
    bys lse_id : egen init_stock`depvar' =max(Kt1)
    drop Kt1 dum_Kt1
}
* avg of RHS vars
foreach vv of varlist ISP LSW* HSW* VAEMP* GDPGAP* GDPPC* spillover* { 
	cap bys lse_id : egen avg_`vv' = mean(`vv') if year>=1995 & year <= 2009
}


* Clean up and save 
ren country_shr_1995 home
cap keep firm_division lse_id BvD home division depvar total_${ttt}_1995 *year *yearindustry countryyear stock* init* spillover* sqspill* cubicspill* k0 MINWx LSW* HSW* ISP VAEMP* GDPGAP* GDPPC* avg_* ${ssPFE}
cap order firm_division lse_id BvD home division depvar total_${ttt}_1995 *year *yearindustry countryyear stock* `s_stock' init* spillover* sqspill* cubicspill* k0 MINWx LSW* HSW* ISP VAEMP* GDPGAP* GDPPC* avg_*
sort firm_division year
sleep 100
save ${d}datasets/macrosim/BvD_year_div_${ln_vers}_pauto95_${chosen_spec}.dta, replace


